/*==============================================================================
FILE NAME: Figure_D3.do
CREATED: 6 July 2024
==============================================================================*/

clear all
set scheme modern

/* Set directory if working independently through code
if c(username)=="" { //insert username
	global rootdir "" // insert root path
	global processed_data "$rootdir/processed_data" 
	global figures "$rootdir/output/figures" // Define global paths for replication package
} 
*/

// Panel A: The Socio-Demographic and Economic Predictors of Air-Related Citizen Complaints for Border and Permian TCEQ Area
use "$processed_data/Air_Panel.dta", clear
collapse (mean) region_id, by(county)
drop if region_id == .
save "$processed_data/Region_county_lookup.dta", replace

use "$processed_data/unique_complaints.dta", clear

drop if year < 2003
drop if year > 2019

rename ZipCode_CIN ZipCode
rename County_CIN County
*drop if ZipFlag_CIN == 1
gen complaint = Number_Of_Complaints

drop if incident_air == 0
keep if incident_air != .

merge m:1 County using "$processed_data/Border_Counties.dta" 
keep if _m != 2
drop _m 

replace border_exact = 0 if border_exact == .
replace border_100km = 0 if border_100km == .

rename County county
merge m:1 county using "$processed_data/Region_county_lookup.dta"
keep if _m!=2

drop _m

gen Border_and_Permian_Basin = 0
replace Border_and_Permian_Basin = 1 if region_id == 6 | region_id == 7 | region_id == 15 | region_id == 16
gen Central = 0
replace Central = 1 if region_id == 9 | region_id == 11 | region_id == 13
gen Coastal_and_East = 0
replace Coastal_and_East = 1 if region_id == 5 | region_id == 10 | region_id == 12 | region_id  == 14
gen North_Central_and_West = 0
replace North_Central_and_West = 1 if region_id == 1 | region_id == 2 | region_id == 3 | region_id == 4 | region_id == 8


collapse (sum) complaint (mean) border_exact border_100km (max) Border Central Coastal_and_East North_Central_and_West, by(ZipCode year)

collapse (mean) complaint border_exact border_100km (max) Border Central Coastal_and_East North_Central_and_West, by(ZipCode) 

replace border_100km = 1 if border_100km >= 0.5
replace border_100km = 0 if border_100km < 0.5
replace border_exact = 1 if border_exact >= 0.5
replace border_exact = 0 if border_exact < 0.5

merge 1:1 ZipCode using "$processed_data/zipcode_demographics_new.dta"
keep if _m != 1
drop _m

merge 1:1 ZipCode using "$processed_data/Emissions_events_count.dta"
keep if _m != 2
drop _m 

merge 1:1 ZipCode using "$processed_data/facility_count.dta"
keep if _m != 2
drop _m 

merge 1:1 ZipCode using "$processed_data/mean_PM25.dta"
keep if _m != 2
drop _m 

replace EmissionsEvent = 0 if EmissionsEvent == .

replace complaint = 0 if complaint == .
replace count_RN = 0 if count_RN == .

foreach x in tot_pop pop_density avg_hh_size median_hh_inc median_year_built median_hh_value median_rent median_rent_share {
replace `x' = . if `x' == 0
}

foreach x in mean_PM25 EmissionsEvent border_100km count_RN tot_pop pop_density avg_hh_size median_hh_inc median_year_built median_hh_value median_rent median_rent_share avg_commute_time urban_share hisp_share white_share black_share other_share less_HS_share college_share LFPR unemployment_rate child_poverty_rate adult_pov_rate {
drop if `x' == .
}

gen median_housing_age = 2000 - median_year_built

foreach x in mean_PM25 EmissionsEvent border_100km count_RN tot_pop pop_density avg_hh_size median_hh_inc median_housing_age median_hh_value median_rent median_rent_share avg_commute_time urban_share hisp_share white_share black_share other_share less_HS_share college_share LFPR unemployment_rate child_poverty_rate adult_pov_rate {
egen std_`x' = std(`x')
}

label var std_count_RN "# of Plants"
label var std_mean_PM25 "Average PM2.5 Concentration (2000-2018)"
label var std_border_100km "Border County"
label var std_tot_pop "Total Population in 2000"
label var std_white_share "White Share in 2000"
label var std_black_share "Black Share in 2000"
label var std_other_share "Other Share in 2000"
label var std_hisp_share "Hispanic Share in 2000"
label var std_urban_share "Urban Population Share in 2000"
label var std_pop_density "Population Density in 2000"
label var std_median_hh_inc "Median Household Income in 2000"
label var std_median_housing_age "Median Housing Age in 2000"
label var std_college_share "Share Some College in 2000"    
label var std_LFPR "Laborforce Participation Rate"
label var std_unemployment_rate "Unemployment Rate"
label var std_child_poverty_rate "Child Poverty Rate"
label var std_adult_pov_rate "Adult Poverty Rate"


gen complaints_pc = complaint/tot_pop 
replace complaints_pc = complaints_pc*1000

preserve 

gen complaint_pc_IHS = log(complaints_pc + sqrt(1+complaints_pc^2))
replace complaint_pc_IHS = complaint_pc_IHS*100

gen logcomplaint_pc = log(complaints_pc)

foreach x in count_RN mean_PM25 border_100km pop_density median_hh_inc median_housing_age urban_share hisp_share white_share black_share other_share college_share LFPR child_poverty_rate adult_pov_rate {
reg logcomplaint_pc std_`x' if Border_and_Permian_Basin == 1, r
est store `x'

}
graph set window fontface "Times New Roman"
coefplot count_RN pop_density median_hh_inc median_housing_age urban_share hisp_share white_share black_share other_share college_share LFPR child_poverty_rate adult_pov_rate mean_PM25 , transform(* = 100*(exp(@)-1)) xline(0) pstyle(p1) ciopts(recast(rcap)) graphregion(fcolor(white)) drop(_cons) offset(0) mcolor(navy) msize(medsmall) mfcolor(navy) ylabel(,labsize(large)) xlabel(-20(10)20, nogrid labsize(medsmall)) xtitle("Percent Change in the Number of Air" "Complaints per 1,000 people", size(large)) legend(off) ytitle("Standard Deviation Increase in...", size(large)) graphregion(fcolor(255 255 255)) graphregion(lcolor(255 255 255)) legend(off) xlabel(-100(20)100, angle(45) labsize(large)) name(rankchange_bv, replace) xsize(8.6)
graph export "$figures/Figure_D3_Panel_A.pdf", replace 


//Panel B: The Socio-Demographic and Economic Predictors of Air-Related Citizen Complaints for Central TCEQ Area
foreach x in count_RN mean_PM25 border_100km pop_density median_hh_inc median_housing_age urban_share hisp_share white_share black_share other_share college_share LFPR child_poverty_rate adult_pov_rate {
reg logcomplaint_pc std_`x' if Central == 1, r
est store `x'

}
graph set window fontface "Times New Roman"
coefplot count_RN pop_density median_hh_inc median_housing_age urban_share hisp_share white_share black_share other_share college_share LFPR child_poverty_rate adult_pov_rate mean_PM25 , transform(* = 100*(exp(@)-1)) xline(0) pstyle(p1) ciopts(recast(rcap)) graphregion(fcolor(white)) drop(_cons) offset(0) mcolor(navy) msize(medsmall) mfcolor(navy) ylabel(,labsize(large)) xlabel(-20(10)20, nogrid labsize(medsmall)) xtitle("Percent Change in the Number of Air" "Complaints per 1,000 people", size(large)) legend(off) ytitle("Standard Deviation Increase in...", size(large)) graphregion(fcolor(255 255 255)) graphregion(lcolor(255 255 255)) legend(off) xlabel(-100(20)100, angle(45) labsize(large)) name(rankchange_bv, replace) xsize(8.6)
graph export "$figures/Figure_D3_Panel_B.pdf", replace 


//Panel C: The Socio-Demographic and Economic Predictors of Air-Related Citizen Complaints for Coastal and East TCEQ Area
foreach x in count_RN mean_PM25 border_100km pop_density median_hh_inc median_housing_age urban_share hisp_share white_share black_share other_share college_share LFPR child_poverty_rate adult_pov_rate {
reg logcomplaint_pc std_`x' if Coastal_and_East == 1, r
est store `x'

}
graph set window fontface "Times New Roman"
coefplot count_RN pop_density median_hh_inc median_housing_age urban_share hisp_share white_share black_share other_share college_share LFPR child_poverty_rate adult_pov_rate mean_PM25 , transform(* = 100*(exp(@)-1)) xline(0) pstyle(p1) ciopts(recast(rcap)) graphregion(fcolor(white)) drop(_cons) offset(0) mcolor(navy) msize(medsmall) mfcolor(navy) ylabel(,labsize(large)) xlabel(-20(10)20, nogrid labsize(medsmall)) xtitle("Percent Change in the Number of Air" "Complaints per 1,000 people", size(large)) legend(off) ytitle("Standard Deviation Increase in...", size(large)) graphregion(fcolor(255 255 255)) graphregion(lcolor(255 255 255)) legend(off) xlabel(-100(20)100, angle(45) labsize(large)) name(rankchange_bv, replace) xsize(8.6)
graph export "$figures/Figure_D3_Panel_C.pdf", replace 

//Panel D: The Socio-Demographic and Economic Predictors of Air-Related Citizen Complaints for North Central and West TCEQ Area

foreach x in count_RN mean_PM25 border_100km pop_density median_hh_inc median_housing_age urban_share hisp_share white_share black_share other_share college_share LFPR child_poverty_rate adult_pov_rate {
reg logcomplaint_pc std_`x' if North_Central_and_West == 1, r
est store `x'

}
graph set window fontface "Times New Roman"
coefplot count_RN pop_density median_hh_inc median_housing_age urban_share hisp_share white_share black_share other_share college_share LFPR child_poverty_rate adult_pov_rate mean_PM25 , transform(* = 100*(exp(@)-1)) xline(0) pstyle(p1) ciopts(recast(rcap)) graphregion(fcolor(white)) drop(_cons) offset(0) mcolor(navy) msize(medsmall) mfcolor(navy) ylabel(,labsize(large)) xlabel(-20(10)20, nogrid labsize(medsmall)) xtitle("Percent Change in the Number of Air" "Complaints per 1,000 people", size(large)) legend(off) ytitle("Standard Deviation Increase in...", size(large)) graphregion(fcolor(255 255 255)) graphregion(lcolor(255 255 255)) legend(off) xlabel(-100(20)100, angle(45) labsize(large)) name(rankchange_bv, replace) xsize(8.6)
graph export "$figures/Figure_D3_Panel_D.pdf", replace 